Mental Illness

Load and clean data

mental_df = 
  read_csv("./data/mental_data.csv") %>% 
  janitor::clean_names() %>% 
  mutate(
    any_mental_num = any_mental_num / 1000000,
    any_mental_per = any_mental_per * 100,
    ser_mental_num = ser_mental_num / 1000000,
    ser_mental_per = ser_mental_per * 100,
    state_abb = state.abb[match(state, state.name)],
    region = state.region[match(state, state.name)]
  ) %>% 
  mutate(
    state_abb = replace(state_abb, state == "District of Columbia", "DC"))
## Rows: 51 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (9): any_mental_per, any_mental_num, ser_mental_per, ser_mental_num, tee...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Depression and anxiety

data cleaning

nhis = 
  read_csv("data/nhis_data01.csv") %>% 
  janitor::clean_names() %>% 
  mutate(
    worrx = recode_factor(worrx,'1' = "no", '2' = "yes"),
    worfreq = recode_factor(worfreq, '1' = "Daily", '2' = "Weekly", 
                            '3' = "Monthly", '4' = "A few times a year", 
                            '5' = "Never"),
    worfeelevl = recode_factor(worfeelevl, '1' = "A lot", 
                               '3' = "Somewhere between a little and a lot", 
                               '2' = "A little"),
    deprx = recode_factor(deprx, '1' = "no", '2' = "yes"),
    depfreq = recode_factor(depfreq, '1' = "Daily", '2' = "Weekly", 
                            '3' = "Monthly", '4' = "A few times a year", 
                            '5' = "Never"),
    depfeelevl = recode_factor(depfeelevl, '1' = "A lot", 
                               '3' = "Somewhere between a little and a lot", 
                               '2' = "A little")
  )
## Rows: 468212 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): NHISHID, NHISPID, HHX, FMX, PX
## dbl (28): YEAR, SERIAL, STRATA, PSU, HHWEIGHT, PERNUM, PERWEIGHT, SAMPWEIGHT...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
recent_yr_df = 
  nhis %>% 
  filter(year>2019)

Worries and anxiety

number of people reported taken medication for worried, nervous, or anxious feeings from 2015 to 2021.

nhis %>% 
  filter(year>=2015) %>%
  drop_na(worrx) %>% 
  group_by(year, worrx) %>% 
  summarize(wor_num = n()) %>% 
  pivot_wider(
    names_from = worrx,
    values_from = wor_num
  ) %>% 
  mutate(
    wor_percentage = yes/(no + yes)*100,
    text_label = str_c(yes, " out of ", no + yes)
  ) %>% 
  plot_ly(
    y = ~wor_percentage,
    x = ~year,
    color = ~year,
    type = "bar", 
    colors = "viridis",
    text = ~text_label) %>% 
  layout(title = "Percentage of people reported taken medication for worried, nervous, or anxious feeings each year",
         xaxis = list (title = ""),
         yaxis = list (title = "Percentage")
         ) %>% 
  hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

Frequency of worries

nhis %>% 
  filter(year>=2015) %>% 
  drop_na(worfreq) %>% 
  group_by(year, worfreq) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  summarize(
     percentage=100 * count/sum(count),
     sum_count = sum(count),
     worfreq = worfreq,
     count=count
  ) %>% 
  mutate(
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~worfreq,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

worry level distribution.

nhis %>% 
  filter(year>=2015) %>%
  drop_na(worfeelevl) %>% 
  group_by(year, worfeelevl) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  summarize(
     percentage=100 * count/sum(count),
     sum_count = sum(count),
     worfeelevl = worfeelevl,
     count=count
  ) %>% 
  mutate(
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~worfeelevl,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    title = "Distribution of worry levels",
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

logistic regression

whether taken medication for worried, nervous, or anxious feelings is associated with covid-19 adjusting for sex and family income level.

glm(worrx ~ sex + poverty + cvddiag, 
                            family=binomial(link='logit'),
                            data = recent_yr_df) %>% 
    broom::tidy(conf.int = TRUE) %>% 
    mutate(
      odds_ratio = exp(estimate),
      low = exp(conf.low),
      high = exp(conf.high)
    ) %>% 
    select(term, odds_ratio, low, high) %>% 
  knitr::kable(digits=2)
term odds_ratio low high
(Intercept) 0.08 0.07 0.09
sex 2.09 1.98 2.20
poverty 0.98 0.98 0.99
cvddiag 1.07 1.03 1.12

Depression

number of people reported taken medication for depression from 2015 to 2021.

nhis %>% 
  filter(year>=2015) %>% 
  drop_na(deprx) %>%
  group_by(year, deprx) %>% 
  summarize(dep_num = n()) %>% 
  pivot_wider(
    names_from = deprx,
    values_from = dep_num
  ) %>% 
  mutate(
    dep_percentage = yes/(no + yes)*100,
    text_label = str_c(yes, " out of ", no + yes)
  ) %>% 
  plot_ly(
    y = ~dep_percentage,
    x = ~year,
    color = ~year,
    type = "bar", 
    colors = "viridis",
    text = ~text_label) %>% 
  layout(title = "Percentage of people reported taken medication for depression each year",
         xaxis = list (title = ""),
         yaxis = list (title = "Percentage")
         ) %>% 
  hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

Frequency of feel depressed

nhis %>% 
  filter(year>=2015) %>%
  drop_na(depfreq) %>% 
  group_by(year, depfreq) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  summarize(
     percentage=100 * count/sum(count),
     sum_count = sum(count),
     depfreq = depfreq,
     count=count
  ) %>% 
  mutate(
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~depfreq,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    title = "Distribution of frequency of feel depressed",
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

depression level distribution.

nhis %>% 
  filter(year>=2015) %>%
  drop_na(depfeelevl) %>% 
  group_by(year, depfeelevl) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  mutate(
    percentage = 100 * count/sum(count),
    sum_count = sum(count),
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~depfeelevl,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    title = "Distribution of depression levels",
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

logistic regression

whether taken medication for depression is associated with covid-19 adjusting for sex and family income level.

glm(deprx ~ sex + poverty + cvddiag, 
                            family=binomial(link='logit'),
                            data = recent_yr_df) %>% 
    broom::tidy(conf.int = TRUE) %>% 
    mutate(
      odds_ratio = exp(estimate),
      low = exp(conf.low),
      high = exp(conf.high)
    ) %>% 
    select(term, odds_ratio, low, high) %>% 
  knitr::kable(digits=2)
term odds_ratio low high
(Intercept) 0.08 0.07 0.09
sex 2.08 1.97 2.20
poverty 0.97 0.97 0.98
cvddiag 1.05 1.00 1.10